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^include <stdio.h> 



#define MATCH_NEEDED_INJ2ND 3 

#defme LEN_MER 8 // recompile when changed. *things to pay attention to. 
int SecondStract(const char*); 

int CrossHyb(const char *strl, const char *str2, int overlap_length); 
int SimpleMatch(const char *strl, const char *str2); 
char FirstN(const char *strl, const char *str2, int N); 

int GC_ADDIT10N=1; 
int NUM_GC = 4; 

int SCORE J<EEDEDJN_2ND = 7; // 1+2+4 
char eql, eq2; 

FILE *fp = stdout; 
FILE *ft)log = stderr; 

main(int argc, char **argv) 

{ 

int ii, jj, comp_score, s; 

int MM[LENMER]; 

char line[256], str [LEN _MER+ 1 ] ; 

int gcsum, passage, total_probes, failed_fh, failed_ch, failed_sm; 
char convert[4J; // 0123 to atcg convertion. 
char *compatible; 

int max_prb, cnt_prb; 
char**probe; 

int max_snd, cnt_snd; 
char **sndstr; 
int *snd_matchcnt; 

int SIMPLE_CUTOFF = 5; // reject if this many bps match to each other, 

// no matter where they are located. 
intCROSSHYB_CUTOFF = 9; // 1+2+4 + 2 
int CROSSHYBJ3VERLAP = 5; 

int FIRST_N « 4; 



eql =eq2 = ' 
if(argc= 1) 

{ 




fprintf(stderr, 

"Usage: %s -o output_file[stdout]\n", 

argv[0]); 
fprintf(stderr, 

n \t\t-gc number_of_GCs_in_probe[%d]\n", 

NUMJ3C); 
fprintf(stderr, 

"\t\t-2 secondary_structure_reject(including this value)[%d]\n", 

SCORE__NEEDED_IN_2ND); 
fprintf(stderr, 

"\t\t-ch crosshyb_reject(including this value)[%d]\n", 

CROSSHYB_CUTOFF); 
fprintf(stderr, 

"\t\t-sm simple_match_reject(including this value)[%d]\n", 

SIMPLE CUTOFF); 
fprintf(stderr, 

"\t\t-ol crosshyb_overlapJength[%d]\n", 

CROSSHYB_OVERLAP); 
fprintf(stderr, 

"\t\t-eq add'Lequiv^bp^in^ompat^checking^/ocyoc] (e.g., -eq gt)\n M , 
eql,eq2); 

fprintf(stderr, "\t\t-fn jBistJ4 Jcngth[%d]\n", FIRST_N); 
fprintf(stderx, "\t\t-gc_add GC_add^enalty[%d]\rT, GC_ADDITION); 

exit(O); 

} 

// parse input parameters. 
n-1; ' 

while(ii < argc) 
{ 

if(strcmp(argv[ii], "-gc") == 0) 
sscanf(argv[ii+l], "%d", &NUM_GC); 

else if(strcmp(argv[ii], "-2") — 0) 
sscanf(argv[ii+l], "%d", &SCORE_NEEDED_IN_2ND); 

else if(strcmp(argv[ii], "-ch") — 0) 
sscanf(argv[ii+l], "%d", &CROSSHYB_CUTOFF); 

else if(strcmp(argv[ii], "-ol") — 0) 
sscanf(argv[ii+l], "%d", &CROSSHYB_OVERLAP); 

else if(strcmp(argv[ii], "-eq") — 0) 

' FIG. 16B 



if(strlen(argv[ii+l]) = 2 || 
(strlen(argv[ii+l]) = 3 && argv[ii+l][2] — W)> 

{ 

eql=argv[ii+lj[0]; 
eq2 = argv[ii+l][l]; 

} 

else 
< 

fprintf(stderr, "\nERROR: Invalid string after -eq flag.\n\n"); 
exit(l); 

} 

} 

else if(strcmp(argv[ii], "-o") = 0) 
{ 

if((fp = fopen(argv[ii+l], "w")) = NULL) 
{ 

fprintf(stderr, "Can't open file %s to writeAn", argy[ii+l]); 
exit(l); 

>• 

char logname[ 128]; 

sprintf(logname, "%s.log", argv[ii+l ]); 
if((fylog = fopen(logname, "w")) ==NUtL) 
{ 

fprintfl[stderr, "failed creating log. stderr usedAn"); 
fplog = stderr; 

} 

} 

else if(strcmp(argv[ii], "-fii") = 0) 
{ 

sscanf(argv[ii+l], "%d", &FIRSTJM); 

} 

else if(strcmp(argv[ii], "-sm") = 0) 
{ 

sscanftargvfii+l], "%d", &SIMPLE_CUTOFF); 

} 

else if(strcmp(atgv[ii], "-gc_add ! ') — 0) 
{ 

sscanf(argv[ii+l], "%d", &GC_ADDITION); 

} 
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else 

{ 



fprintf(stderr, H Unknow flag %s\n", argv[ii]); 
exit(l); 

} 

ii+=2; 

} 

max_prb « 30000; 
probe « new char* [max_prb]; 
for(ii = 0; ii < max_prb; ii++) 
probepi] - new char [LENJvflER+1]; 
cnt__prb = 0; 

max_snd = 5000; 

sndstr = new char* [max_snd]; 

for(ii = 0; ii < max_snd; ii++) 

sndstrfii] = new char [LEN_MER+1]; 
snd_matchcnt = new int [max_snd]; 
cnt_snd = 0; 

// build an array of probes. Each probe is of length LEN^MER, 
// of which , NUM_GC are Gs or Cs. 

convert[0] = 'a'; 
convert[l] - V; 
convert[2] = 'c f ; 
convert[3] = f g r ; 

total jprobes = 0; 

passage = 0; // number of probes pass GC test 
for(MM[0] = 0; MM[0] < 4; MM[0]++) 
for(MM[l] = 0; MM[1] < 4; MM[1]++) 
for(MM[2] = 0; MM[2] < 4; MM[2]++) 
for(MM[3] = 0; MM[3] < 4; MM[3]++) 

for(MM[4] = 0; MM[4] < 4; MM[4]++) //*things to pay attention to. 
for(MM[5] = 0; MM[5] < 4; MM[5]++) //*things to pay attention to. 
for(MM[6] = 0; MM[6] < 4; MM[6]++) //*things to pay attention to. 
for(MM[7] - 0; MM[7] < 4; MM[7]++) //*things to pay attention to. 
{ 

total_probes++; 
gesum = 0; 



// build a probe. 




for(jj = 0; jj < LEN_MER; jj++) 
{ 

strQj] = convertfMMQj]]; 
ifCstrDJl = 'C || strQj] = ' g -) 
gcsum++; 

} 

str[LEN_MER] = \0'; 

// check its GC contents and secondary structure. 

if(gcsum = NUM_GC) 

{ 

pass _gc++; 

fprintf(fplog, "pass GCtest: %s\n M , str); 
if(!SecondStruct(str)) 

r 

strcpy(probe[cnt jrb], str); 
if(++cnt_prb = max_prb) 
{ 

// should relocate memory. 

// To simplefy the program, let's just give an error msg. 

fprintf(stderr, "ERROR: Probe array is too small, cnt _prb is %d\n M , cnt_prb); 

exit(l); 

} 

} 

else 
{ 

// record the rejected string 
strcpy(sndstr[cnt_snd], str); 
if(++cnt_snd — max_snd) 

{ 

fprintf(stderr, "ERROR: Secondary Structure array is too small. cnt_snd = %d\n", 

cnt_snd); 

exit(l); 

} 

} 

} 



fprintfi(fp, "\n%d mer probe selection\n", LEN_MER); 
fprintf(fp, "Number of GCs in the probes: %d\n", NUM_GC); 
fprintf(fp, "Score to reject as secondary structure: %d\n", 

SCORE_NEEDED_IN_2ND); 
fprintf(fp, "Score to reject as incompatible: %d\n", CROSSHYB_CUTOFF); 
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fprintfCfp, "Compatible test overlap: %d\n M , CROSSHYB ^OVERLAP); 
fprintf(fp, "Additional equivalent base-pair in compatibility checking: %c%c\n", 
eql,eq2); 

fprintf(fp, "Simple match cutoff value(including): %d\n", SIMPLEJZUTOFF); 
fprintf(fp, "First N value(including): %d\n M , FIRSTJN); 
fprintf(fp, "Additional penalty for G or C: %d\n", GC_ADDITION); 
fprintf(§>, "\n\n"); 

fprintf(fp, "Total possible %d mers: %d\n M , LENJvfER, total_probes); 

fprintf(fp, "Number passed GCtest : %d\n", passage); 

fprintf(fp, "Number passed secondary structure test : %d\n", cnt_prb); 
// for(ii = 0; ii < cnt_snd; ii++) 
// fprintf(fp, "%s\n", sndstr[ii]); 



// From the set (call it setl) of probes which passed GC and 2nd structure 
// tests, choose a probe into the final set(set2). Then compare this 
// probe against all the probes left in setl and throught out the ones 
// that may crosshyb to this probe. From what's left in setl, choose 
// another probe and compary it to the rest of set 1 ... 

compatible = new char [cnt_prb]; 
for(ii = 0; ii < cnt_prb; ii-H-) 
{ 

compatiblefii] = T; 

} 

// Compatibility check #1: Use weighted scores to penalize neighboring matches. 
// first jrnatch_score = 1 ; 

// if prev pair is a match, current_match_score = prev_match_score*2. 
ii = 0; 

failed_ch = 0; 
while(ii < cnt_prb) 

{ 

for(jj = ii+1 ; jj < cnt_prb; jj++) 
{ 

if(compatibleOj] — T && 
(s=CrossHyb(probe[ii],probeQj],CROSSHYB_OVERLAP)) >= CROSSHYB_CUTOFF) 

{ 

compatibleQj] = *F; 
failed_ch++; 

fprintf(fplog, M Rejected(%d) %s in slide test for %s\n", 
s, probeQj], probe[ii]); 

FIG. 16F 



while(ii < cnt_prb && compatiblepi] = 'F) 
ii++; 

} 

fprintf(fp, ''Number of probes passed compatibility test: %d\n", 
cnt_prb - failed_ch); 

// Compatibility check #2: Use unweighted score: count unconsecutive matches 

// find the first 'passed* probe. 
ii-0; 

while(ii < cnt _prb && compatiblepi] = T) 

ii++; 
failed_sm = 0; 
while(ii < cnt_prb) 
{ 

for(ij = ii+1 ; jj < cnt_prb; 
{ 

if(compatibleQj] = T && 
(s-SimpIeMatch(probe[ii],probeQj])) >= SIMPLE_CUTOFF) 

{ 

compatibleQj] = T; 

fprintf(fplog, "Rejected(%d) %s in simple_match test for %s\n w , 

s, probeQj], probepi]); 
failed_sm-H-; 



ii-i-f; 

while(ii < cnt jprb && compatiblepi] = T f ) 
ii++; 

} 

fprintf(fp, "Number of probes passed simple match test: %d\n'\ 
cnt_prb - failed_ch - failed_sm); 



// Compatibility check #3: if the first N bases match ANYWHERE in another probe. 

// find the first 'passed' probe, 
ii = 0; 

while(ii < cnt_prb && compatiblepi] — T') 

ii++; 
failed_fo = 0; 



while(ii<cnt_prb) 




{ 

forQj = ii+1; jj < cnt_prb; jj++) 
{ 

if(compatibleQj] = T && 
FirstN(probe[ii], probeQj], FIRSTJN) = T) 

{ 

compadbleQjl^T'; 
failed_m-H-; 

fprintf(fplog, "Rejected %s in FIRSTN test for %s\n n , 
probeDj], probeLii]); 

} 

} 

ii++; 

while(ii < cnt_prb && compatiblepi] = T) 
ii++; 

} 

fprint^fp, "Number of probes passed FIRSTN compatibility test: %d\n", 
cnt_prb - failed_ch - failed_sm - failed_m); 

// output. 
jj = 0; 

fprintf(fp, "\nSelected probes are: \n"); 

for(ii - 0; ii < cnt_prb; ii++) 

{ 

if(compatible[ii] = T) 
{ 

fprintf(fp, "%s \n", probe[ii]); 

} 

} 

} 



// Check if 'str' contains a secondary structure. That is, if there is ; 
// consecutive 3 bases that matches when 'str 1 is folded. 
// return 1 if found secondary structure, 0 otherwise. 

int SecondStruct(const char *str) 
{ 

intii,jj,kk,U; 
int sum, score[32]; 
char prev_match; 

char *compl; F"|G 1 Gl l 



char complement[256]; 
complement['a'] =f; 
coraplememft'] = 'a'; 
complementfc'] = 'g"; 
complememfg'] = 'c'; 

11 = strlen(str); 
compl = new char [11+1]; 
for(ii = 0;ii<ll;ii++) 
{ 

complpi] = complement[str[ii]]; 

} 

for(ii « MATCH_NEEDED_IN_2ND; ii < 11 - MATCH_NEEDED_IN_2ND; ii++) 
{ 

prev_match = T; 
sum = 0; 

for(jj = 0;jj<ii;jj++) 
{ 

scorejjj] = 0; 
kk = ii*2-jj; 
if(kk < 11) 
{ 

if[strQj] = complfkk]) 
{ 

if(prev_match = T) 
{ 

scoreftj] = score[jj-l] * 2; 

} 

else 
{ 

scoreQj]= 1; 
prev_match = T; 

} 

} 

else 

{ 

prev_match = 'F; 

} 

} 

sum += score [jj]; 

} 

// fprintf(stderr, "2' sum = %d\n M , sum); 
if(sum >= SCORE_NEEDED_IN_2ND) 
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{ 

delete Q compl; 

return 1 ;// Found a 2nd structure. 

} 

} 

for(ii = MATCH_NEEDED_IN_2ND - 1; ii < 11 - MATCH_NEEDED_IN_2ND; ii++) 
{ 

prev_match = T'; 
sum = 0; 

for(ij = 0;jj<=ii;jj++) 
{ 

scoreQj] = 0; 
kk = ii*2+l -jj; 
if(kk<ll) 
{ 

if(strQj] = compl[kk]) 

{ . 

if(prev_match == T) 
{ 

score[ij] = score[jj-l]*2; 

} 

else 

{ 

scoreQj] - 1; 
prev_match = T; 

} 

} 

else 
{ 

prev_match = 'F; 

} 

} 

sum += scoreQj]; 

} 

// fprintf(stderr, "2' sum = %d\n", sum); 
if(sum >= SCORE_NEEDED_IN_2ND) 

{ 

delete Q compl; 

return 1 ; // Found a 2nd structure. 

} 



delete Q compl; 
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return 0; // No 2nd structure. 

} 



// check if strl and str2 can hybridizy together. 
// return the max of match scores. 
// Assume strlen(strl) = strlen(str2). 

int CrossHyb(const char *strl, const char *str2, int overlap) 
{ 

int ii, jj, len, sum, score, prev_score, max_sum, numGC; 
char prevjnatch; 

len = strlen(strl); 
max_sum = 0; 

fprintf(fplog, "Sliding test between %s and %s\n n , strl, str2); 

for(ii = overlap-ien; ii <- len-overlap; ii++) 
{ 

numGC = 0; 
sum = 0; 

score = prevjcore = 0; 
prev_match = 'F; 
fprintf(fplog, "Compare "); 
for(jj = ii; jj < len && jj - ii < len; jj++) 
{ 

if(jj>=0&&jj-ii >= 0) 
{ 

fprintf(fplog, "(%c,%c) strlQj], str2Qj-ii]); 
if((strl[jj] = str2Dj.ii]) || 
(strl [jj] — eql && str2Qj-ii] = eq2) || 
(strlQj] — eq2 && str2[jj-ii] = eql)) 

{ if(((strl [jj]|32) = 'g' && (str2Qj-ii]|32) = ' g ') || 
((strl[5j]|32) — V && (str2Qj-u]|32) = V)) 
numGC++; 

if(prevjnatch = T) 
{ 

Score = prev_score*2; 

} 

else 

{ 

score = 1; 
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prev_match = T; 

} 

} 

else 

{ 

score = 0; 
prev_match = 'F; 

} 

sum += score; 
prev_score = score; 

} 

} 

fprintf(fplog, "Score=%d\n M , sum + numGC*GC_ADDITION); 

if(sum + numGC* GC_ADDITION > max_sum) 
max_sum = sum + numGC*GC_ADDITION; 

} 

fprintf(fplog, "Max score is %d\n", max_sum); 
return max_sum; 

} 

// Compare 2 strings base to base, 0 to 0, 1 to 1 no sliding. 

// return number of matches. 

// Assume strlen(strl) = strlen(str2). 

int SimpieMatch(const char *strl, const char *str2) 
{ 

int ii, sum; 
sum = 0; 

for(ii = 0; ii < strlen(strl); ii++) 
{ 

if((strl[ii] = str2[ii]) || 
(strl [ii] = eql && str2[ii] = eq2) || 
(strl [ii] = eq2 && str2[ii] = eql)) 

{ 

sum++; 

} 

} 

return sum; 
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// Check if the first N bases of the two probes are identical, 
char FirstN(const char *strl, const char *str2, int N) 

{ 

int ii; 

char match = T; 

if(N>strlen(strl)) 
return *F; 

for(ii = 0;ii<N;ii-H-) 
{ 

if(!((strl[ii] = str2[ii])|| 
(strlfii] == eql && str2[ii] = eq2) || 
(strl[ii] = eq2 && str2[ii] = eql))) 

{ 

match = 'F; 
■ break; 
} 

} 

return match; 
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Combine resin supports, 
Mix and aliquot into new 
synthesis columns 





Synthesize the nucleotide A on a column 
and in parallel synthesize G, C and T 
each on their own oligo synthesis 
columns. 
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After second round of synthesis there are a 
total of 16 di-nucleotide sequences. If 
columns were combined, mixed, aliquoted, 
and followed by a third round of synthesis, a 
total of 64 different tri-nucleotides would be 
generated. 
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Reisolate plasmid library after passage from induced 
and non induced grown cells. Keep libraries separate. 
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1 



FITC label Starting 
Library RNA 



1 



Rhodamine label 
Passaged Library 




Combine and hybridize RNA from 
libraries to ID Tag Beads 




Sequence and clone Perturbagen 
DNA back into expression construct 
and validate effect 



t 



Pert 



Use specific ID-Tag primer and a 
common vector primer juxts 
to perturbagen to amplify 
corresponding perturbagen 



t 



Sequence ID Tag and 
generate complement oligo 



RT-PCR amplify RNA off beads 
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Sort single FITC+ beads 
into 96 well PCR Plate 





FITC Fluorescence 



Sort all Beads on Cell sorter to identify Beads 
labeled only with FITC+ RNA 



